As always, we begin by importing the data and preprocessing it for our main algorithm. For NMDS, we can use an n x p (samples x species/asv) matrix derived from the otu table.
ps <- readRDS('data/16Sdada2/phyloseq.filtered.rds')
mat <- data.frame(otu_table(ps))
dim(mat) #ensure samples x species, should have many more species
## [1] 927 44455
#preprocess metadata
timelevs <- c("Jul_2022","Nov_2022","Feb_2023", "Aug_2023","Nov_2023", "Feb_2024","Aug_2024")
treatlevs <- c("Baseline","Control","Water","N", "N+P","N+Water","P","P+Water","N+P+Water")
treatcols <- c("#E3E571","#E5AE71","#71C4E5","#53DF7B","#C0E5A4","#BCE7D6","#DF53B7","#E7BCCC","#804DEF")
mhablevs <- c("SI","SG","SSI","SSG")
mcols <- c("#B897DD","#BCDD97","#B43FEE","#78EE3F")
mshapes <- c(25,10,17,16)
meta <- data.frame(sample_data(ps)) %>%
rename(Sample=SampleID) %>%
mutate(SampleDate=factor(SampleDate, levels=timelevs)) %>%
mutate(Treatment=factor(Treatment, levels=treatlevs)) %>%
mutate(Microhabitat=factor(Microhabitat, levels=mhablevs))
NMDS (Non-metric MultiDimensional Scaling) is a dimension reduction
technique that works well for ordinal data, due to the fact that this
method does not assume that the data are embedded in euclidean space
like, say, PCA does. It does, however, require calculating the distance
matrix which is an n x n matrix, so it can easily fit in memory. There
is a more automated way to do this, with metaMDS(), but we
will make use of the distance matrix in subsequent analyses.
X <- vegdist(mat, method='bray')
saveRDS(X, 'data/16Svegan/bray.dist.rds')
From this distance matrix, we can calculate the NMDS coordinates of
each sample using the metaMDS() function. We choose to
reduce the data into k=3 dimensions and return not only
NMDS coordinates for samples, but also species using the
wascores = T argument. The arguments try and
trymax limit the minimum and maximum numbers of random
starts in search of stable solutions, each random seed requires its own
calculation, the number of which that are allowed to concurrently run is
controlled by the argument parallel. The argument
maxit controls the maximum number of gradient descent steps
allowed in each random seed.
## Run 0 stress 0.08535412
## Run 1 stress 0.08393647
## ... New best solution
## ... Procrustes: rmse 0.01091119 max resid 0.07994729
## Run 2 stress 0.08494292
## Run 3 stress 0.08362004
## ... New best solution
## ... Procrustes: rmse 0.009227818 max resid 0.0803566
## Run 4 stress 0.08343208
## ... New best solution
## ... Procrustes: rmse 0.008600853 max resid 0.1196124
## Run 5 stress 0.08282884
## ... New best solution
## ... Procrustes: rmse 0.006828928 max resid 0.06638029
## Run 6 stress 0.0828019
## ... New best solution
## ... Procrustes: rmse 0.007061908 max resid 0.07771071
## Run 7 stress 0.08214676
## ... New best solution
## ... Procrustes: rmse 0.008286633 max resid 0.1162608
## Run 8 stress 0.08551763
## Run 9 stress 0.08378161
## Run 10 stress 0.08164285
## ... New best solution
## ... Procrustes: rmse 0.007488201 max resid 0.1172257
## Run 11 stress 0.08691273
## Run 12 stress 0.08588862
## Run 13 stress 0.08463128
## Run 14 stress 0.08403151
## Run 15 stress 0.08450282
## Run 16 stress 0.08611117
## Run 17 stress 0.08620623
## Run 18 stress 0.0855382
## Run 19 stress 0.08569671
## Run 20 stress 0.0844119
## Run 21 stress 0.08178086
## ... Procrustes: rmse 0.006852379 max resid 0.07785055
## Run 22 stress 0.08394713
## Run 23 stress 0.08424212
## Run 24 stress 0.08496977
## Run 25 stress 0.08601604
## Run 26 stress 0.08546292
## Run 27 stress 0.08636198
## Run 28 stress 0.08346239
## Run 29 stress 0.08447204
## Run 30 stress 0.08487116
## Run 31 stress 0.08532526
## Run 32 stress 0.08588519
## Run 33 stress 0.08166903
## ... Procrustes: rmse 0.005509128 max resid 0.05950757
## Run 34 stress 0.08505631
## Run 35 stress 0.08432983
## Run 36 stress 0.08532478
## Run 37 stress 0.08607073
## Run 38 stress 0.08445375
## Run 39 stress 0.08550218
## Run 40 stress 0.08221731
## Run 41 stress 0.08487171
## Run 42 stress 0.08460354
## Run 43 stress 0.0854864
## Run 44 stress 0.08601124
## Run 45 stress 0.08605713
## Run 46 stress 0.08279872
## Run 47 stress 0.08646516
## Run 48 stress 0.0868453
## Run 49 stress 0.08641423
## Run 50 stress 0.08384599
## Run 51 stress 0.0832722
## Run 52 stress 0.08563539
## Run 53 stress 0.08562167
## Run 54 stress 0.08627567
## Run 55 stress 0.08190595
## ... Procrustes: rmse 0.006833873 max resid 0.06890822
## Run 56 stress 0.08404194
## Run 57 stress 0.08409737
## Run 58 stress 0.08612339
## Run 59 stress 0.08558332
## Run 60 stress 0.08335889
## Run 61 stress 0.0846487
## Run 62 stress 0.08326218
## Run 63 stress 0.08449324
## Run 64 stress 0.0864869
## Run 65 stress 0.08466209
## Run 66 stress 0.08660871
## Run 67 stress 0.08485488
## Run 68 stress 0.08429841
## Run 69 stress 0.08683163
## Run 70 stress 0.08474366
## Run 71 stress 0.08393816
## Run 72 stress 0.08449511
## Run 73 stress 0.08362491
## Run 74 stress 0.08289304
## Run 75 stress 0.08395037
## Run 76 stress 0.08392322
## Run 77 stress 0.08632522
## Run 78 stress 0.08464527
## Run 79 stress 0.08290634
## Run 80 stress 0.08672221
## Run 81 stress 0.08620915
## Run 82 stress 0.08713077
## Run 83 stress 0.08487217
## Run 84 stress 0.08604299
## Run 85 stress 0.08426787
## Run 86 stress 0.08538565
## Run 87 stress 0.08523612
## Run 88 stress 0.08670555
## Run 89 stress 0.08503328
## Run 90 stress 0.08655829
## Run 91 stress 0.08285318
## Run 92 stress 0.08521967
## Run 93 stress 0.08563759
## Run 94 stress 0.08349429
## Run 95 stress 0.08591212
## Run 96 stress 0.08641372
## Run 97 stress 0.08396861
## Run 98 stress 0.08236374
## Run 99 stress 0.08365024
## Run 100 stress 0.08492932
## Run 101 stress 0.08270296
## Run 102 stress 0.08355232
## Run 103 stress 0.08583005
## Run 104 stress 0.08271651
## *** Best solution was not repeated -- monoMDS stopping criteria:
## 13: no. of iterations >= maxit
## 91: scale factor of the gradient < sfgrmin
Here we ensure that the solution we found is relatively well suited to the dataset. We do this by checking the stress (for NMDS, <0.1 is desirable, >0.2 is unreliable), goodness of fit, and by plotting the observed distance in the distance matrix against the euclidean distance in NMDS space. We expect the correlation to be positive, but not necessarily linear.
print(paste0("Stress of NMDS model: ", out$stress))
## [1] "Stress of NMDS model: 0.0816428472425574"
gdat <- data.frame(out$points)
gdat$goodness <- goodness(out)
gdat$Sample <- rownames(gdat)
gdat <- left_join(gdat, meta, by='Sample')
ggplot(gdat)+
geom_jitter(aes(x=SampleDate, y=goodness, color=Treatment, shape=Microhabitat),
size=4, height = 0, width=0.25, alpha=0.85)+
theme_bw()+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes)+
xlab("Date")+
ylab("Goodness of Fit")
stressplot(out)
For now, we only graph in two dimensions at a time. Color is mapped to Treatment and shape is mapped to Microhabitat.
ggplot(gdat)+
geom_point(aes(x=MDS1, y=MDS2, color=Treatment, shape=Microhabitat),
size=3, alpha=0.85)+
theme_bw()+
theme(text = element_text(size=15))+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes)+
facet_wrap(~SampleDate)+
ggtitle('MDS1 v MDS2')
ggplot(gdat)+
geom_point(aes(x=MDS2, y=MDS3, color=Treatment, shape=Microhabitat),
size=3, alpha=0.85)+
theme_bw()+
theme(text = element_text(size=15))+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes)+
facet_wrap(~SampleDate)+
ggtitle('MDS2 v MDS3')
sidx <- meta %>% filter(Surface_Subsurface=='S') %>% rownames()
ssidx <- meta %>% filter(Surface_Subsurface=='SS') %>% rownames()
sX <- vegdist(mat[sidx,], method='bray')
saveRDS(sX, 'data/16Svegan/surface.bray.dist.rds')
ssX <- vegdist(mat[ssidx,], method='bray')
saveRDS(ssX, 'data/16Svegan/subsurface.bray.dist.rds')
sout <- metaMDS(sX,
k=3,
wascores = T,
weakties=T,
try=50,
trymax=100,
parallel=8,
maxit=300)
## Run 0 stress 0.09661422
## Run 1 stress 0.0989859
## Run 2 stress 0.0973924
## Run 3 stress 0.09708871
## ... Procrustes: rmse 0.005123623 max resid 0.07594433
## Run 4 stress 0.09783455
## Run 5 stress 0.09810782
## Run 6 stress 0.09890763
## Run 7 stress 0.09863227
## Run 8 stress 0.09941426
## Run 9 stress 0.09661442
## ... Procrustes: rmse 1.834069e-05 max resid 0.0002976793
## ... Similar to previous best
## Run 10 stress 0.1007624
## Run 11 stress 0.09964049
## Run 12 stress 0.1005622
## Run 13 stress 0.09911444
## Run 14 stress 0.1089245
## Run 15 stress 0.098245
## Run 16 stress 0.09896147
## Run 17 stress 0.09852827
## Run 18 stress 0.09693037
## ... Procrustes: rmse 0.002362211 max resid 0.04220401
## Run 19 stress 0.09965828
## Run 20 stress 0.0986033
## Run 21 stress 0.09830208
## Run 22 stress 0.09953155
## Run 23 stress 0.09858351
## Run 24 stress 0.1004599
## Run 25 stress 0.09663079
## ... Procrustes: rmse 0.00124009 max resid 0.02555732
## Run 26 stress 0.09706164
## ... Procrustes: rmse 0.005825789 max resid 0.08202905
## Run 27 stress 0.09968068
## Run 28 stress 0.09823767
## Run 29 stress 0.09697914
## ... Procrustes: rmse 0.004515875 max resid 0.05721353
## Run 30 stress 0.09658552
## ... New best solution
## ... Procrustes: rmse 0.002738656 max resid 0.05721214
## Run 31 stress 0.09963402
## Run 32 stress 0.09852015
## Run 33 stress 0.1011324
## Run 34 stress 0.1054409
## Run 35 stress 0.09661633
## ... Procrustes: rmse 0.0009915242 max resid 0.01799692
## Run 36 stress 0.09856166
## Run 37 stress 0.1059871
## Run 38 stress 0.09891441
## Run 39 stress 0.1070338
## Run 40 stress 0.1001474
## Run 41 stress 0.09674835
## ... Procrustes: rmse 0.002337851 max resid 0.04170742
## Run 42 stress 0.09954126
## Run 43 stress 0.1020981
## Run 44 stress 0.09892237
## Run 45 stress 0.1070284
## Run 46 stress 0.09663198
## ... Procrustes: rmse 0.002932346 max resid 0.05714901
## Run 47 stress 0.0986843
## Run 48 stress 0.09718428
## Run 49 stress 0.0987448
## Run 50 stress 0.09875886
## Run 51 stress 0.09855863
## Run 52 stress 0.0969159
## ... Procrustes: rmse 0.003353065 max resid 0.05698478
## Run 53 stress 0.09962408
## Run 54 stress 0.09795052
## Run 55 stress 0.09741752
## Run 56 stress 0.09998285
## Run 57 stress 0.09874481
## Run 58 stress 0.09834706
## Run 59 stress 0.09865145
## Run 60 stress 0.09862983
## Run 61 stress 0.09862754
## Run 62 stress 0.1003743
## Run 63 stress 0.09822388
## Run 64 stress 0.1001077
## Run 65 stress 0.1002608
## Run 66 stress 0.09831133
## Run 67 stress 0.1002526
## Run 68 stress 0.09844919
## Run 69 stress 0.1058431
## Run 70 stress 0.09873614
## Run 71 stress 0.09969527
## Run 72 stress 0.1005393
## Run 73 stress 0.09822945
## Run 74 stress 0.09663091
## ... Procrustes: rmse 0.002947473 max resid 0.05713764
## Run 75 stress 0.09723153
## Run 76 stress 0.1004608
## Run 77 stress 0.09786056
## Run 78 stress 0.09795364
## Run 79 stress 0.1003378
## Run 80 stress 0.09965766
## Run 81 stress 0.09659763
## ... Procrustes: rmse 0.00130495 max resid 0.02542224
## Run 82 stress 0.0984728
## Run 83 stress 0.09973814
## Run 84 stress 0.09691556
## ... Procrustes: rmse 0.003751358 max resid 0.07767137
## Run 85 stress 0.1000345
## Run 86 stress 0.0984392
## Run 87 stress 0.09814368
## Run 88 stress 0.09658222
## ... New best solution
## ... Procrustes: rmse 0.0004815123 max resid 0.009124852
## ... Similar to previous best
## *** Best solution repeated 1 times
saveRDS(sout, 'data/16Svegan/deca_surface_nmds.rds')
ssout <- metaMDS(ssX,
k=3,
wascores = T,
weakties=T,
try=50,
trymax=100,
parallel=8,
maxit=300)
## Run 0 stress 0.09832418
## Run 1 stress 0.09944446
## Run 2 stress 0.1009685
## Run 3 stress 0.09763545
## ... New best solution
## ... Procrustes: rmse 0.008939268 max resid 0.1000068
## Run 4 stress 0.1007179
## Run 5 stress 0.09773757
## ... Procrustes: rmse 0.006105213 max resid 0.07589726
## Run 6 stress 0.09656974
## ... New best solution
## ... Procrustes: rmse 0.007163635 max resid 0.09821754
## Run 7 stress 0.1046896
## Run 8 stress 0.1003856
## Run 9 stress 0.1008345
## Run 10 stress 0.09680943
## ... Procrustes: rmse 0.007285573 max resid 0.1004825
## Run 11 stress 0.09931471
## Run 12 stress 0.09638622
## ... New best solution
## ... Procrustes: rmse 0.007760488 max resid 0.1002476
## Run 13 stress 0.09639578
## ... Procrustes: rmse 0.004706375 max resid 0.09636973
## Run 14 stress 0.09756402
## Run 15 stress 0.09928784
## Run 16 stress 0.099313
## Run 17 stress 0.09802772
## Run 18 stress 0.1108404
## Run 19 stress 0.1007676
## Run 20 stress 0.09691276
## Run 21 stress 0.09784894
## Run 22 stress 0.09814825
## Run 23 stress 0.09647816
## ... Procrustes: rmse 0.002702533 max resid 0.04267518
## Run 24 stress 0.09840497
## Run 25 stress 0.09943236
## Run 26 stress 0.09945676
## Run 27 stress 0.09761147
## Run 28 stress 0.09700645
## Run 29 stress 0.09671626
## ... Procrustes: rmse 0.007086962 max resid 0.1001671
## Run 30 stress 0.09929211
## Run 31 stress 0.1073013
## Run 32 stress 0.09702065
## Run 33 stress 0.09676101
## ... Procrustes: rmse 0.008229295 max resid 0.100214
## Run 34 stress 0.09659966
## ... Procrustes: rmse 0.004078752 max resid 0.08694724
## Run 35 stress 0.0982182
## Run 36 stress 0.09637659
## ... New best solution
## ... Procrustes: rmse 0.006577366 max resid 0.1002743
## Run 37 stress 0.09942019
## Run 38 stress 0.09633083
## ... New best solution
## ... Procrustes: rmse 0.004781164 max resid 0.1005886
## Run 39 stress 0.09754466
## Run 40 stress 0.09949008
## Run 41 stress 0.09853524
## Run 42 stress 0.09676683
## ... Procrustes: rmse 0.008204353 max resid 0.1003117
## Run 43 stress 0.1002083
## Run 44 stress 0.09771459
## Run 45 stress 0.09932987
## Run 46 stress 0.09972595
## Run 47 stress 0.09811324
## Run 48 stress 0.09775708
## Run 49 stress 0.09686512
## Run 50 stress 0.09633053
## ... New best solution
## ... Procrustes: rmse 5.983411e-05 max resid 0.001196105
## ... Similar to previous best
## Run 51 stress 0.1006391
## Run 52 stress 0.09694655
## Run 53 stress 0.09707558
## Run 54 stress 0.1054848
## Run 55 stress 0.09793567
## Run 56 stress 0.09855197
## *** Best solution repeated 1 times
saveRDS(ssout, 'data/16Svegan/deca_suburface_nmds.rds')
print(paste0("Stress of Surface NMDS model: ", sout$stress))
## [1] "Stress of Surface NMDS model: 0.0965822204575298"
print(paste0("Stress of Subsurface NMDS model: ", ssout$stress))
## [1] "Stress of Subsurface NMDS model: 0.0963305259848007"
gdat <- data.frame(sout$points)
gdat$goodness <- goodness(sout)
gdat$Sample <- rownames(gdat)
gdat <- left_join(gdat, meta, by='Sample')
ggplot(gdat)+
geom_jitter(aes(x=SampleDate, y=goodness, color=Treatment, shape=Microhabitat),
size=4, height = 0, width=0.25, alpha=0.85)+
theme_bw()+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes)+
xlab("Date")+
ylab("Goodness of Fit")+
ggtitle("Surface GOF")
stressplot(sout)
ggplot(gdat)+
geom_point(aes(x=MDS1, y=MDS2, color=Treatment, shape=Microhabitat),
size=3, alpha=0.85)+
theme_bw()+
theme(text = element_text(size=15))+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes)+
facet_wrap(~SampleDate)+
ggtitle('Surface MDS1 v MDS2')
gdat <- data.frame(ssout$points)
gdat$goodness <- goodness(ssout)
gdat$Sample <- rownames(gdat)
gdat <- left_join(gdat, meta, by='Sample')
ggplot(gdat)+
geom_jitter(aes(x=SampleDate, y=goodness, color=Treatment, shape=Microhabitat),
size=4, height = 0, width=0.25, alpha=0.85)+
theme_bw()+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes[3:4])+
xlab("Date")+
ylab("Goodness of Fit")+
ggtitle("Subsurface GOF")
stressplot(ssout)
ggplot(gdat)+
geom_point(aes(x=MDS1, y=MDS2, color=Treatment, shape=Microhabitat),
size=3, alpha=0.85)+
theme_bw()+
theme(text = element_text(size=15))+
scale_color_manual(values = treatcols)+
scale_shape_manual(values = mshapes[3:4])+
facet_wrap(~SampleDate)+
ggtitle('Subsurface MDS1 v MDS2')